In [1]:
%run "0. config.ipynb"


1887

In [2]:
players_stats = pd.DataFrame.from_csv("data/players_stats.csv")

In [4]:
print len(players_stats["sessionId"].unique())


53

In [57]:
players_stats["count"] = 1

session_id = players_stats.groupby(["sessionId"]).agg({ "count": np.sum, "complete": np.sum })

session_id.head()


Out[57]:
count complete
sessionId
05ba6db4-ac71-4953-a524-a4a9ebd9df91 1 0
069e1b60-538e-41d3-a6f5-29becc606521 1 0
0baf8bd8-8697-4628-99cb-538f8f5c0f24 2 0
0e1f8f26-dfd2-4265-b916-317435bf50ab 2 1
0ffa7953-c993-4a73-a93e-b3917484fdfb 1 0

In [58]:
session_id.hist("complete")


Out[58]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1134ea6d0>]], dtype=object)

In [60]:
session_id.plot(kind="scatter", x="count", y="complete")


Out[60]:
<matplotlib.axes._subplots.AxesSubplot at 0x1135a1510>

Only one computer used to play 13 games have completed 5 games. Most of games have been played with a 1 game / 1 computer.


In [56]:
df = pd.DataFrame()

df["class"] = session_id["count"]

df = df.reset_index()
df = df.drop("sessionId",1)
df["count"] = 1

df = df.groupby(["class"]).agg({ "count": np.sum })

print np.sum([ i * x["count"] for i,x in df.iterrows() ])

A = df.reset_index().as_matrix()
#print A
print A[:,0].dot(A[:,1].T)

df.head(20)


97
97
Out[56]:
count
class
1 39
2 9
4 1
9 1
13 1
14 1